{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 2: Essential DataFrame Operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.set_option('max_columns', 4, 'max_rows', 10, 'max_colwidth', 12)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Introduction"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selecting Multiple DataFrame Columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv('data/movie.csv')\n",
    "movie_actor_director = movies[['actor_1_name', 'actor_2_name',\n",
    "    'actor_3_name', 'director_name']]\n",
    "movie_actor_director.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "type(movies[['director_name']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "type(movies['director_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "type(movies.loc[:, ['director_name']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "type(movies.loc[:, 'director_name'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "cols = ['actor_1_name', 'actor_2_name',\n",
    "        'actor_3_name', 'director_name']\n",
    "movie_actor_director = movies[cols]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies['actor_1_name', 'actor_2_name',\n",
    "      'actor_3_name', 'director_name']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Selecting Columns with Methods"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv('data/movie.csv')\n",
    "def shorten(col):\n",
    "    return (col.replace('facebook_likes', 'fb')\n",
    "               .replace('_for_reviews', '')\n",
    "    )\n",
    "movies = movies.rename(columns=shorten)\n",
    "movies.get_dtype_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.select_dtypes(include='int').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.select_dtypes(include='number').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.select_dtypes(include=['int', 'object']).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.select_dtypes(exclude='float').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.filter(like='fb').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "cols = ['actor_1_name', 'actor_2_name',\n",
    "        'actor_3_name', 'director_name']\n",
    "movies.filter(items=cols).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.filter(regex=r'\\d').head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Ordering Column Names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv('data/movie.csv')\n",
    "def shorten(col):\n",
    "    return (col.replace('facebook_likes', 'fb')\n",
    "               .replace('_for_reviews', '')\n",
    "    )\n",
    "movies = movies.rename(columns=shorten)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "cat_core = ['movie_title', 'title_year',\n",
    "            'content_rating', 'genres']\n",
    "cat_people = ['director_name', 'actor_1_name',\n",
    "              'actor_2_name', 'actor_3_name']\n",
    "cat_other = ['color', 'country', 'language',\n",
    "             'plot_keywords', 'movie_imdb_link']\n",
    "cont_fb = ['director_fb', 'actor_1_fb',\n",
    "           'actor_2_fb', 'actor_3_fb',\n",
    "           'cast_total_fb', 'movie_fb']\n",
    "cont_finance = ['budget', 'gross']\n",
    "cont_num_reviews = ['num_voted_users', 'num_user',\n",
    "                    'num_critic']\n",
    "cont_other = ['imdb_score', 'duration',\n",
    "               'aspect_ratio', 'facenumber_in_poster']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "new_col_order = cat_core + cat_people + \\\n",
    "                cat_other + cont_fb + \\\n",
    "                cont_finance + cont_num_reviews + \\\n",
    "                cont_other\n",
    "set(movies.columns) == set(new_col_order)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies[new_col_order].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Summarizing a DataFrame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv('data/movie.csv')\n",
    "movies.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.ndim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "len(movies)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.min()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.describe().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.describe(percentiles=[.01, .3, .99]).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.min(skipna=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Chaining DataFrame Methods"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies = pd.read_csv('data/movie.csv')\n",
    "def shorten(col):\n",
    "    return (col.replace('facebook_likes', 'fb')\n",
    "               .replace('_for_reviews', '')\n",
    "    )\n",
    "movies = movies.rename(columns=shorten)\n",
    "movies.isnull().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(movies\n",
    "   .isnull()\n",
    "   .sum()\n",
    "   .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.isnull().sum().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.isnull().any().any()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies.isnull().get_dtype_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "movies[['color', 'movie_title', 'color']].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "with pd.option_context('max_colwidth', 20):\n",
    "    movies.select_dtypes(['object']).fillna('').max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "with pd.option_context('max_colwidth', 20):\n",
    "    (movies\n",
    "        .select_dtypes(['object'])\n",
    "        .fillna('')\n",
    "        .max()\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DataFrame Operations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "colleges = pd.read_csv('data/college.csv')\n",
    "colleges + 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "colleges = pd.read_csv('data/college.csv', index_col='INSTNM')\n",
    "college_ugds = colleges.filter(like='UGDS_')\n",
    "college_ugds.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "name = 'Northwest-Shoals Community College'\n",
    "college_ugds.loc[name]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.loc[name].round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds.loc[name] + .0001).round(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds + .00501"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds + .00501) // .01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds_op_round = (college_ugds + .00501) // .01 / 100\n",
    "college_ugds_op_round.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds_round = (college_ugds + .00001).round(2)\n",
    "college_ugds_round"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds_op_round.equals(college_ugds_round)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    ".045 + .005"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college2 = (college_ugds\n",
    "    .add(.00501) \n",
    "    .floordiv(.01) \n",
    "    .div(100)\n",
    ")\n",
    "college2.equals(college_ugds_op_round)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparing Missing Values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "np.nan == np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "None == None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "np.nan > 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "5 > np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "np.nan != 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Getting ready"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college = pd.read_csv('data/college.csv', index_col='INSTNM')\n",
    "college_ugds = college.filter(like='UGDS_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds == .0019"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_self_compare = college_ugds == college_ugds\n",
    "college_self_compare.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_self_compare.all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds == np.nan).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.equals(college_ugds)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.eq(.0019)    # same as college_ugds == .0019"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "from pandas.testing import assert_frame_equal\n",
    "assert_frame_equal(college_ugds, college_ugds) is None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Transposing the direction of a DataFrame operation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college = pd.read_csv('data/college.csv', index_col='INSTNM')\n",
    "college_ugds = college.filter(like='UGDS_')\n",
    "college_ugds.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.count(axis='columns').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.sum(axis='columns').head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.median(axis='index')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds_cumsum = college_ugds.cumsum(axis=1)\n",
    "college_ugds_cumsum.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### See also"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Determining college campus diversity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "pd.read_csv('data/college_diversity.csv', index_col='School')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How to do it\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college = pd.read_csv('data/college.csv', index_col='INSTNM')\n",
    "college_ugds = college.filter(like='UGDS_')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds.isnull()\n",
    "   .sum(axis='columns')\n",
    "   .sort_values(ascending=False)\n",
    "   .head()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds = college_ugds.dropna(how='all')\n",
    "college_ugds.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.ge(.15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "diversity_metric = college_ugds.ge(.15).sum(axis='columns')\n",
    "diversity_metric.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "diversity_metric.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "diversity_metric.sort_values(ascending=False).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "college_ugds.loc[['Regency Beauty Institute-Austin',\n",
    "                   'Central Texas Beauty College-Temple']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "us_news_top = ['Rutgers University-Newark',\n",
    "                  'Andrews University',\n",
    "                  'Stanford University',\n",
    "                  'University of Houston',\n",
    "                  'University of Nevada-Las Vegas']\n",
    "diversity_metric.loc[us_news_top]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### How it works\\..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### There\\'s more\\..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds\n",
    "   .max(axis=1)\n",
    "   .sort_values(ascending=False)\n",
    "   .head(10)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "(college_ugds > .01).all(axis=1).any()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "### See also"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "cell_metadata_filter": "-all",
   "main_language": "python",
   "notebook_metadata_filter": "-all"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
